#-*- coding:utf-8 -*-
import requests;
from lxml import etree;
import csv;
import time;
import re;
import mysql.connector

conn=mysql.connector.connect(user='root',password='123456',database='jiang');
cursor=conn.cursor();

header={'User-Agent':'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36'};
start_url='https://cd.lianjia.com/ershoufang/pg';
unit_price_partten=re.compile(r'\d{1,7}');

#迭代每一页
for i in range(102,1001):
	url=start_url+str(i);
	html=requests.get(url,headers=header);
	time.sleep(1);
	selector=etree.HTML(html.text);
	#所有小区信息，都是存在 li 中的，获取所有的li
	xiaoquList=selector.xpath('/html/body/div[4]/div[1]/ul/li');
	#迭代所有小区
	for xiaoqu in xiaoquList:
		name=xiaoqu.xpath('div[1]/div[2]/div/a/text()')[0];
		unit_price=xiaoqu.xpath('div[1]/div[6]/div[2]/span/text()')[0];
		result=re.findall(unit_price_partten,unit_price);
		unit_price=float(result[0]);
		total_price=xiaoqu.xpath('div[1]/div[6]/div[1]/span/text()')[0];
		total_price=float(total_price);
		pianqu=xiaoqu.xpath('div[1]/div[3]/div/a/text()')[0];
		item=[name,unit_price,total_price,pianqu];
		sql="INSERT INTO house(name,unit_price,total_price,pianqu) VALUES ('%s',%d,%d,'%s')"%(name,unit_price,total_price,pianqu);
		cursor.execute(sql);
		print('正在抓取%d'%i,item);
	conn.commit();		
		

	